# Loading data
## Books
setwd("/Users/edmondawad/Dropbox (MIT)/Gov2001/Project/Languages datasets/Books/")
stats.books <- read.csv(file="dataset_stats_books.tsv",head=T,sep="\t")
books.nodes <- read.csv(file="books_nodes.tsv",head=T,sep="\t")
books.edges <- read.csv(file="books_edges.tsv",head=T,sep="\t")

## Wikipedia
setwd("/Users/edmondawad/Dropbox (MIT)/Gov2001/Project/Languages datasets/Wiki/")
stats.wiki <- read.csv(file="dataset_stats_wikipedia.tsv",head=T,sep="\t")
wiki.nodes <- read.csv(file="wikipedia_nodes.tsv",head=T,sep="\t")
wiki.edges <- read.csv(file="wikipedia_edges.tsv",head=T,sep="\t")

## Twitter
setwd("/Users/edmondawad/Dropbox (MIT)/Gov2001/Project/Languages datasets/Twitter/")
stats.twitter <- read.csv(file="dataset_stats_twitter.tsv",head=T,sep="\t")
twitter.nodes <- read.csv(file="twitter_gln_nodes.tsv",head=T,sep="\t")
twitter.edges <- read.csv(file="twitter_gln_links_with_over_expression.tsv",head=T,sep="\t")

## Utility
setwd("/Users/edmondawad/Dropbox (MIT)/Gov2001/Project/Languages datasets/Utility/")
gdp.country <- read.csv(file="gdp_pc_population_by_country.tsv",head=T,sep="\t")
gdp.language <- read.csv(file="gdp_pc_population_by_language.tsv",head=T,sep="\t")
language.conversion <- read.csv(file="language_conversion_table_iso639-3.tsv",head=T,sep="\t")
language.speakers <- read.csv(file="language_speakers_families_iso639-3.tsv",head=T,sep="\t")

## Famous
setwd("/Users/edmondawad/Dropbox (MIT)/Gov2001/Project/Languages datasets/Famous individuals/")
famous.murr.country <- read.csv(file="Famous_murray_by_country.tsv",head=T,sep="\t")
famous.murr.language <- read.csv(file="Famous_murray_by_language.tsv",head=T,sep="\t")
famous.wiki.country <- read.csv(file="Famous_wikipedia_by_country.tsv",head=T,sep="\t")
famous.wiki.language <- read.csv(file="Famous_wikipedia_by_language.tsv",head=T,sep="\t")

## Analysis
setwd("/Users/edmondawad/Dropbox (MIT)/Gov2001/Project/Languages datasets/Analysis/")
Analysis <- read.csv(file="centralities_by_language.tsv",head=T,sep="\t")
Analysis$language.name <- as.character(Analysis$language.name)
Analysis$language.name <- gsub(" \\(macrolanguage\\)", "", Analysis$language.name)

## Country names and codes
setwd("/Users/edmondawad/Dropbox (MIT)/Gov2001/Project/Languages datasets/Countries/")
country <- read.csv(file="count_name_code.csv",head=T,sep=",")
country$Name <- as.character(country$Name)
country$Name[country$Name=="Russian Federation"] <- "Russia"

## GDELT
setwd("/Users/edmondawad/Dropbox (MIT)/Gov2001/Project/Languages datasets/gdelt/")
VC <- read.csv(file="out_qc1_agg10.csv",head=T,sep=",")
MC <- read.csv(file="out_qc2_agg10.csv",head=T,sep=",")
VN <- read.csv(file="out_qc3_agg10.csv",head=T,sep=",")
MN <- read.csv(file="out_qc4_agg10.csv",head=T,sep=",")
detach()
library(plyr)

#library(dplyr)
# gdelt <- merge(VC,MC,by.x=c("from_lang","to_lang"),by.y=c("from_lang","to_lang"))
# names(gdelt)[3] <- "VC" #"Verbal Cooperation"
# names(gdelt)[4] <- "MC" #"Material Cooperation"
# gdelt <- merge(gdelt,VN,by.x=c("from_lang","to_lang"),by.y=c("from_lang","to_lang"))
# names(gdelt)[5] <-  "VN" #"Verbal Conflict"
# gdelt <- merge(gdelt,MN,by.x=c("from_lang","to_lang"),by.y=c("from_lang","to_lang"))
# names(gdelt)[6] <-  "MN" #"Material Conflict"

gdelt <- merge(VC,VN,by.x=c("from_lang","to_lang"),by.y=c("from_lang","to_lang"))
names(gdelt)[3] <- "MC" 
names(gdelt)[4] <- "MN" 

gdelt <- merge(gdelt,twitter.edges[,c(1,2,5)],by.x=c("from_lang","to_lang"),by.y=c("Source","Target"))
names(gdelt)[5] <- "twitter"
gdelt <- merge(gdelt,wiki.edges[,c(1,2,7)],by.x=c("from_lang","to_lang"),by.y=c("SourceLanguageCode","TargetLanguageCode"))
names(gdelt)[6] <- "wiki"
gdelt <- merge(gdelt,books.edges[,c(1,2,7)],by.x=c("from_lang","to_lang"),by.y=c("SourceLanguageCode","TargetLanguageCode"))
names(gdelt)[7] <- "books"

gdp.dict<-new.env()
pop.dict<-new.env()
for(i in seq(nrow(gdp.language)))
{  gdp.dict[[ as.character(gdp.language[i,1]) ]]<- gdp.language[i,2]
pop.dict[[ as.character(gdp.language[i,1]) ]]<- gdp.language[i,3]}

gdp1 <- rep(0,nrow(gdelt))
pop1 <- rep(0,nrow(gdelt))

gdp2 <- rep(0,nrow(gdelt))
pop2 <- rep(0,nrow(gdelt))

for(i in seq(nrow(gdelt)))
{  gdp1[i] <- gdp.dict[[as.character(gdelt$from_lang[i])]]
pop1[i] <- pop.dict[[as.character(gdelt$from_lang[i])]]
gdp2[i] <- gdp.dict[[as.character(gdelt$to_lang[i])]]
pop2[i] <- pop.dict[[as.character(gdelt$to_lang[i])]]
}
gdelt <- cbind(gdelt,gdp1/gdp2,pop1/pop2)


plotdata <- gdelt

plotdata2 <- plotdata %>% gather(colsource,GLN,twitter:books)
plotdata2 <- plotdata2 %>% gather(rowsource,IR,MC:MN)
plotdata3 <- plotdata2
plotdata3$GLN <- log10(plotdata3$GLN)
plotdata3$IR <- log10(plotdata3$IR)
#plotdata3$rowsource = factor(plotdata3$rowsource,labels=c("log(Verbal Cooperation)","log(Material Cooperation)","log(Verbal Conflict)","log(Material Conflict)"))
plotdata3$rowsource = factor(plotdata3$rowsource,labels=c("log(Material Cooperation)","log(Material Conflict)"))

plotdata3$colsource = factor(plotdata3$colsource,labels=c("log(Twitter Links)","log(Wikipedia Links)","log(Book Trans. Links)"))
plotdata3 <- na.omit(plotdata3)


names(plotdata3)[3] <- "GDP_Ratio"
names(plotdata3)[4] <- "Population_Ratio"



adjR2p <- ddply(plotdata3, c("rowsource","colsource"), summarize, adjr2 = round(summary(lm(IR~GLN))$adj.r.squared,3), p= 0.001)

pdf(file="Fig4.pdf", width = 18, height = 10)

ggplot(plotdata3,aes(GLN, IR))+
  geom_point(aes(size=Population_Ratio,color = log10(GDP_Ratio)))+scale_size_area(max_size = 18)+
#  geom_text(aes(label=paste(from_lang,"-",to_lang)), size = 2)+
  geom_smooth(method = "lm", se = FALSE)+
  geom_text(data=adjR2p,aes(label=(paste("R2=", adjr2, "\n p<", p))), x=1, y=5.7,size=4)+
  facet_grid(rowsource~colsource)+
  xlab("")+
  ylab("")+
  ylim(0.4,6)+
  xlim(0,6)+ 
  theme_bw()+
  scale_color_distiller(type='seq', palette='Reds',guide='colourbar',direction=1)+
  guides(fill = guide_legend(reverse = TRUE))+
  guides(color = guide_legend(reverse = TRUE))+
  theme(text=element_text(size=18),legend.position="top",aspect.ratio=1/1,
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        legend.text=element_text(size=20),strip.background = element_rect(color="black",fill="white"))

dev.off()
